#!/cray/css/libsci/bavier/bin/guile -s
!#

(define-module (data-mining indexed-matrix)
  #:use-module (srfi srfi-9)
  #:use-module (srfi srfi-26)
  #:use-module (data-mining hash-util)
  #:export (make-indexed-matrix
	    make-indexed-matrix/shared

	    indexed-matrix-set!
	    indexed-matrix-ref
	    indexed-matrix-handle

	    indexed-matrix-length
	    indexed-matrix-width

	    indexed-matrix-row-tags
	    indexed-matrix-column-tags

	    indexed-matrix-for-each-row
	    indexed-matrix-for-each-column
	    indexed-matrix-for-each-entry

	    indexed-matrix-map-rows
	    indexed-matrix-map-columns
	    indexed-matrix-map-indexed-rows
	    indexed-matrix-map-indexed-columns

	    indexed-matrix->row-list
	    indexed-matrix->column-list
	    indexed-matrix->array

	    indexed-matrix-row-entries
	    indexed-matrix-indexed-row
	    indexed-matrix-column-entries
	    indexed-matrix-indexed-column

	    indexed-matrix-reindex-row!
	    indexed-matrix-reindex-column! 
	    ))

(define-record-type indexed-matrix
  (make-indexed-matrix* row-tags-table col-tags-table rows cols)
  indexed-matrix?
  (row-tags-table row-tags-table      set-row-tags-table!)
  (col-tags-table col-tags-table      set-col-tags-table!)
  (rows           indexed-matrix-rows set-indexed-matrix-rows!)
  (cols           indexed-matrix-cols set-indexed-matrix-cols!))

(define* (make-indexed-matrix #:key
			      (row-tags '())
			      (column-tags '()))
  (let ((m (make-indexed-matrix*
	    ;; We use hash-table's as sets for row-tags and col-tags
	    (make-hash-table)		;row-tags-table
	    (make-hash-table)		;col-tags-table
	    (make-hash-table)		;rows
	    (make-hash-table)		;cols
	    )))
    (set-indexed-matrix-row-tags! m row-tags)
    (set-indexed-matrix-column-tags! m column-tags)
    m))

;;; Make a new shared indexed-matrix.  The new indexed-matrix will
;;; share underlying storage with M, but will only operate on the row
;;; indices in I and column indices in J.  New entries added to the
;;; created indexed-matrix will not be seen by M, but changing an
;;; existing entry *will* be seen by M.  If the I and J arguments are
;;; not given then all entries of the row or column, respecitively,
;;; will be shared in the new indexed-matrix.
(define* (make-indexed-matrix/shared
	  m
	  #:key
	  (row-indices (indexed-matrix-row-tags m))
	  (column-indices (indexed-matrix-column-tags m)))
  (let ((n (make-indexed-matrix
	    #:row-tags row-indices
	    #:column-tags column-indices)))
    (set-indexed-matrix-rows! n (indexed-matrix-rows m))
    (set-indexed-matrix-cols! n (indexed-matrix-cols m))
    n))

(define-public (indexed-matrix-set! m e i j)
  (let ((rh (hash-create-handle! (indexed-matrix-rows m) i (make-hash-table)))
	(ch (hash-create-handle! (indexed-matrix-cols m) j (make-hash-table))))
    (begin
      (hash-set! (row-tags-table m) i i)
      (hash-set! (col-tags-table m) j j)
      (hash-set! (cdr rh) j e)
      (hash-set! (cdr ch) i e))))

;;; Return the tuple (i j . e) or #f if no entry
(define-public (indexed-matrix-handle m i j)
  (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
    (and rh (let ((ch (hash-get-handle (cdr rh) j)))
	      (and ch (cons (car rh) ch))))))

(define-public (indexed-matrix-ref m i j)
  (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
    (and rh (hash-ref (cdr rh) j))))

(define-public (indexed-matrix-for-each-row proc m)
  ;; For each row i of the indexed matrix, call (proc i <entries>),
  ;; where <entries> is the list of entries in that row.  The order in
  ;; which is the rows are traversed is not guaranteed.
  (for-each				;TODO: formulate in terms of hash-for-each
   (lambda (i)
     (proc i (indexed-matrix-row-entries m i)))
   (indexed-matrix-row-tags m)))

(define-public (indexed-matrix-for-each-column proc m)
  (for-each				;TODO: formulate in terms of hash-for-each
   (lambda (j)
     (proc j (indexed-matrix-column-entries m j)))
   (indexed-matrix-column-tags m)))

;;; proc is called as (i j e), where i is the row tag and j is the column tag
;;; associated with the entry e.
(define-public (indexed-matrix-for-each-entry proc m)
  (for-each
   (lambda (i)
     (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
       (if rh
	   (for-each
	    (lambda (j)
	      (let ((ch (hash-get-handle (cdr rh) j)))
		(if ch
		    (proc i j (cdr ch)))))
	    (indexed-matrix-column-tags m)))))
   (indexed-matrix-row-tags m)))

(define-public (indexed-matrix-map-rows proc m)
  (map
   (lambda (i)
     (proc i (indexed-matrix-row-entries m i)))
   (indexed-matrix-row-tags m)))

(define-public (indexed-matrix-map-columns proc m)
  (map
   (lambda (j)
     (proc j (indexed-matrix-column-entries m j)))
   (indexed-matrix-column-tags m)))

(define-public (indexed-matrix-map-indexed-rows proc m)
  (map
   (lambda (i)
     (proc i (indexed-matrix-indexed-row m i)))
   (indexed-matrix-row-tags m)))

(define-public (indexed-matrix-map-indexed-columns proc m)
  (map
   (lambda (j)
     (proc j (indexed-matrix-indexed-column m j)))
   (indexed-matrix-column-tags m)))

;;; Return a dense representation of this indexed-matrix in row-major list
;;; form.  Index information is lost through this transformation.
;;; Non-existent entries appear as #f, which will conflict with explicit #f
;;; entries if the matrix is not already known to be dense/completely filled.
(define-public (indexed-matrix->row-list m)
  (map
   (lambda (col)
     (map (cut hash-ref col <>)
	  (indexed-matrix-column-tags m)))
   (map (cut hash-ref (indexed-matrix-rows m) <>)
	(indexed-matrix-row-tags m))))

(define-public (indexed-matrix->column-list m)
  (map
   (lambda (row)
     (map (cut hash-ref row <>)
	  (indexed-matrix-row-tags m)))
   (map (cut hash-ref (indexed-matrix-cols m) <>)
	(indexed-matrix-column-tags m))))

;;; Return a list of the entries in the given row.  Returns #f if I is an
;;; invalid row index.
(define-public (indexed-matrix-row-entries m i)
  (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
    (and rh (map (cut hash-ref (cdr rh) <>)
		 (indexed-matrix-column-tags m)))))

;;; Return an association list whose keys are the column tags and values are
;;; the entries those tags map to.
;;;
;;; E.g. If M is ::
;;; 
;;;        a b c
;;;        - - -
;;;    1 : 1 2 3
;;;    2 : 4 5 6
;;;
;;; then
;;;
;;; guile> (indexed-matrix-indexed-row m 2)
;;;   => ((a . 4) (b . 5) (c . 6))
;;;   
(define-public (indexed-matrix-indexed-row m i)
  (let ((rh (hash-get-handle (indexed-matrix-rows m) i)))
    (and rh (map (lambda (c)
		   (cons c (hash-ref (cdr rh) c)))
		 (indexed-matrix-column-tags m)))))

;;; Return a list of the entries in the given column.  Returns #f if J is an
;;; invalid column index.
(define-public (indexed-matrix-column-entries m j)
  (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
    (and ch (map (cut hash-ref (cdr ch) <>)
		 (indexed-matrix-row-tags m)))))

;;; Analogous to indexed-matrix-indexed-row, but column-wise
(define-public (indexed-matrix-indexed-column m j)
  (let ((ch (hash-get-handle (indexed-matrix-cols m) j)))
    (and ch (map (lambda (c)
		   (cons c (hash-ref (cdr ch) c)))
		 (indexed-matrix-row-tags m)))))

;;; Change all references to the row index I to use NI instead.
(define-public (indexed-matrix-reindex-row! m i ni)
  (hash-move-key (row-tags-table m) i ni (make-hash-table))
  (hash-move-key (indexed-matrix-rows m) i ni (make-hash-table))
  ;; Update references in the column hashes
  (hash-for-each (lambda (j jv) (hash-move-key jv i ni))
		 (indexed-matrix-cols m)))

;;; Change all references to the column index J to use NJ instead.
(define-public (indexed-matrix-reindex-column! m j nj)
  (hash-move-key (col-tags-table m) j nj (make-hash-table))
  (hash-move-key (indexed-matrix-cols m) j nj (make-hash-table))
  ;; Update reference in the row hashes
  (hash-for-each (lambda (i iv) (hash-move-key iv j nj))
		 (indexed-matrix-rows m)))

;;; Transform the indexed-matrix into a 2-dimensional scheme array.  The row
;;; and column permutations are based on the order of the indices as they are
;;; found in (row-tags M) and (col-tags M), as this information cannot be
;;; stored in the resulting array.
(define-public (indexed-matrix->array m)
  (let ((a (make-array #f
		       (indexed-matrix-length m)
		       (indexed-matrix-width m)))
	(row-map (list->index-map (indexed-matrix-row-tags m)))
	(col-map (list->index-map (indexed-matrix-column-tags m))))
    (begin
     (indexed-matrix-for-each-entry
      (lambda (i j e)
	(array-set! a e (assoc-ref row-map i) (assoc-ref col-map j)))
      m)
     a)))

;;; Given a list, create an association list which maps each element of the
;;; given list to the index at which that element resides in the list.
(define-public (list->index-map lst)
  ;; An iterative algorithm allows us to traverse the list only once.  The
  ;; other alternative is (map cons lst (iota (length lst))), but requires two
  ;; list traversals.
  (define (iter c l a)
    (if (null? l)
	a
	(iter (1+ c) (cdr l) (cons (cons (car l) c) a))))
  (iter 0 lst '()))

(define-public (indexed-matrix-length m)
  (hash-table-size (row-tags-table m)))

(define-public (indexed-matrix-width m)
  (hash-table-size (col-tags-table m)))

(define (first-arg  a . b) a)
(define (second-arg a b . c) b)
(define (third-arg  a b c . d) c)

;; Provide transparent list-based access to internal hash sets.
(define (indexed-matrix-row-tags m)
  (hash-map->list second-arg (row-tags-table m)))
(define (set-indexed-matrix-row-tags! m rt)
  (let ((rtt (make-hash-table)))
    (for-each (lambda (t) (hash-set! rtt t t)) rt)
    (set-row-tags-table! m rtt)))

(define (indexed-matrix-column-tags m)
  (hash-map->list second-arg (col-tags-table m)))
(define (set-indexed-matrix-column-tags! m ct)
  (let ((ctt (make-hash-table)))
    (for-each (lambda (t) (hash-set! ctt t t)) ct)
    (set-col-tags-table! m ctt)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Tests

(use-modules (srfi srfi-64)
	     (srfi srfi-1)
	     (ice-9 format)
	     (data-mining test-util))

(test-begin "indexed-matrix-test")
(define im (make-indexed-matrix))
(test-assert (indexed-matrix? im))

;;; If we set and then ref, we should get back what we set
;;;      1      2
;;; "a" 10   "hi"
;;; "b" 20   "there"
(indexed-matrix-set! im 10 "a" 1)
(test-assert (eq? (indexed-matrix-ref im "a" 1) 10))
(indexed-matrix-set! im "hi" "a" 2)
(test-assert (eq? (indexed-matrix-ref im "a" 2) "hi"))
(indexed-matrix-set! im 20 "b" 1)
(test-assert (eq? (indexed-matrix-ref im "b" 1) 20))
(indexed-matrix-set! im "there" "b" 2)
(test-assert (eq? (indexed-matrix-ref im "b" 2) "there"))

;;; Check dimensions
(test-assert (eq? (indexed-matrix-width im) 2))
(test-assert (eq? (indexed-matrix-length im) 2))

;;; Check indexed-matrix-handle
(define im_a1-handle (indexed-matrix-handle im "a" 1))
(test-assert (eq? (car im_a1-handle) "a"))
(test-assert (eq? (cadr im_a1-handle) 1))
(test-assert (eq? (cddr im_a1-handle) 10))
(test-assert (not (indexed-matrix-handle im "foo" 9000))) ;return #f for invalid

;;; XXX: This test is fragile because it depends on the order in which
;;; row entries are returned by indexed-matrix-row-entries.
(define row-globs (make-list (indexed-matrix-length im) #f))
(indexed-matrix-for-each-row
 (let ((row-num 0))
   (lambda (i es)
     (list-set! row-globs row-num
		(format #f "~{~a~}" es))
     (set! row-num (1+ row-num))))
 im)
(test-assert (member "hi10" row-globs))
(test-assert (member "there20" row-globs))

;;; Check row-tags, col-tags
(test-assert ((list-permutation? '("a" "b")) (indexed-matrix-row-tags im)))
(test-assert ((list-permutation? '(1 2)) (indexed-matrix-column-tags im)))

;;; Check for-each-entry
(define entries-seen '())
(indexed-matrix-for-each-entry
 (lambda (i j e)
   (test-assert (indexed-matrix-handle im i j)) ;there is an entry at this point
   (test-assert (indexed-matrix-ref im i j) e) ;that entry is the same
   (set! entries-seen (cons e entries-seen)))
 im)
(test-assert (null? (lset-difference equal? '(10 20 "hi" "there") entries-seen)))

;;; Check row-entries
(test-assert (null? (lset-difference equal? '(10 "hi")
				     (indexed-matrix-row-entries im "a"))))
(test-assert (null? (lset-difference equal? '(20 "there")
				     (indexed-matrix-row-entries im "b"))))

;;; Check row-list and column-list
(define rl (indexed-matrix->row-list im))
(test-assert (find (list-permutation? '(10 "hi")) rl))
(test-assert (find (list-permutation? '(20 "there")) rl))
(define cl (indexed-matrix->column-list im))
(test-assert (find (list-permutation? '(10 20)) cl))
(test-assert (find (list-permutation? '("hi" "there")) cl))

;;; Check indexed-row and indexed-colum
(test-assert (same-map? (indexed-matrix-indexed-row im "a")
			'((1 . 10) (2 . "hi"))))
(test-assert (same-map? (indexed-matrix-indexed-row im "b")
			'((1 . 20) (2 . "there"))))
(test-assert (same-map? (indexed-matrix-indexed-column im 1)
			'(("a" . 10) ("b" . 20))))
(test-assert (same-map? (indexed-matrix-indexed-column im 2)
			'(("a" . "hi") ("b" . "there"))))

;;; Test internal helper routine hash-move-key
(define h (make-hash-table))
(hash-set! h "foo" 9001)
(test-eq "hash size" 1 (hash-table-size h))
(test-assert (hash-ref h "foo"))
(test-assert (equal? (hash-ref h "foo") 9001))
(hash-move-key h "foo" "bar")
(test-eq "hash size after move" 1 (hash-table-size h))
(test-assert (hash-ref h "bar"))
(test-assert (equal? (hash-ref h "bar") 9001))

;;; Test make-indexed-matrix/shared
(define im' (make-indexed-matrix/shared im))
(test-eq "shared width" 2 (indexed-matrix-width im'))
(test-eq "shared length" 2 (indexed-matrix-length im'))

(indexed-matrix-set! im' "you" "c" 2)
(test-eq "original width retained" 2 (indexed-matrix-width im))
(test-eq "original length retained" 2 (indexed-matrix-length im))
(test-eq "new length" 3 (indexed-matrix-length im'))
(test-assert "original column tags remain"
	     ((list-permutation? '(1 2))
	      (indexed-matrix-column-tags im)))
(test-assert "original row tags remain"
	     ((list-permutation? '("a" "b"))
	      (indexed-matrix-row-tags im)))
(test-assert "new row tags"
	     ((list-permutation? '("a" "b" "c"))
	      (indexed-matrix-row-tags im')))
(indexed-matrix-set! im' 13 "a" 1)
(test-eq "shared changes propagate"
	 13 (indexed-matrix-ref im "a" 1))

(test-end "indexed-matrix-test")
